In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('result.csv')
df
Out[2]:
Order_ID Product Quantity Price Total Order_Date Address Month
0 176558 USB-C Charging Cable 2 11.95 23.90 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 4
1 176559 Bose SoundSport Headphones 1 99.99 99.99 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 4
2 176560 Google Phone 1 600.00 600.00 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4
3 176560 Wired Headphones 1 11.99 11.99 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4
4 176561 Wired Headphones 1 11.99 11.99 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 4
... ... ... ... ... ... ... ... ...
185945 259353 AAA Batteries (4-pack) 3 2.99 8.97 2019-09-17 20:56:00 840 Highland St, Los Angeles, CA 90001 9
185946 259354 iPhone 1 700.00 700.00 2019-09-01 16:00:00 216 Dogwood St, San Francisco, CA 94016 9
185947 259355 iPhone 1 700.00 700.00 2019-09-23 07:39:00 220 12th St, San Francisco, CA 94016 9
185948 259356 34in Ultrawide Monitor 1 379.99 379.99 2019-09-19 17:30:00 511 Forest St, San Francisco, CA 94016 9
185949 259357 USB-C Charging Cable 1 11.95 11.95 2019-09-30 00:18:00 250 Meadow St, San Francisco, CA 94016 9

185950 rows × 8 columns

In [4]:
df['Order_Date'] = pd.to_datetime(df.Order_Date)
In [6]:
df['Hour'] = df.Order_Date.dt.hour
In [7]:
df['Day_name'] = df.Order_Date.dt.day_name()
In [8]:
df
Out[8]:
Order_ID Product Quantity Price Total Order_Date Address Month Hour Day_name
0 176558 USB-C Charging Cable 2 11.95 23.90 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 4 8 Friday
1 176559 Bose SoundSport Headphones 1 99.99 99.99 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 4 22 Sunday
2 176560 Google Phone 1 600.00 600.00 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 14 Friday
3 176560 Wired Headphones 1 11.99 11.99 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 4 14 Friday
4 176561 Wired Headphones 1 11.99 11.99 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 4 9 Tuesday
... ... ... ... ... ... ... ... ... ... ...
185945 259353 AAA Batteries (4-pack) 3 2.99 8.97 2019-09-17 20:56:00 840 Highland St, Los Angeles, CA 90001 9 20 Tuesday
185946 259354 iPhone 1 700.00 700.00 2019-09-01 16:00:00 216 Dogwood St, San Francisco, CA 94016 9 16 Sunday
185947 259355 iPhone 1 700.00 700.00 2019-09-23 07:39:00 220 12th St, San Francisco, CA 94016 9 7 Monday
185948 259356 34in Ultrawide Monitor 1 379.99 379.99 2019-09-19 17:30:00 511 Forest St, San Francisco, CA 94016 9 17 Thursday
185949 259357 USB-C Charging Cable 1 11.95 11.95 2019-09-30 00:18:00 250 Meadow St, San Francisco, CA 94016 9 0 Monday

185950 rows × 10 columns

In [10]:
result = df.groupby('Hour').agg(['sum', 'count'])['Total']
result
Out[10]:
sum count
Hour
0 713721.27 3910
1 460866.88 2350
2 234851.44 1243
3 145757.89 831
4 162661.01 854
5 230679.82 1321
6 448113.00 2482
7 744854.12 4011
8 1192348.97 6256
9 1639030.58 8748
10 1944286.77 10944
11 2300610.24 12411
12 2316821.34 12587
13 2155389.80 12129
14 2083672.73 10984
15 1941549.60 10175
16 1904601.31 10384
17 2129361.61 10899
18 2219348.30 12280
19 2412938.54 12905
20 2281716.24 12228
21 2042000.86 10921
22 1607549.21 8822
23 1179304.44 6275
In [11]:
result.sort_values('sum', ascending=False).head(5)
Out[11]:
sum count
Hour
19 2412938.54 12905
12 2316821.34 12587
11 2300610.24 12411
20 2281716.24 12228
18 2219348.30 12280
In [12]:
result.sort_values('count', ascending=False).head(5)
Out[12]:
sum count
Hour
19 2412938.54 12905
12 2316821.34 12587
11 2300610.24 12411
18 2219348.30 12280
20 2281716.24 12228
In [13]:
import matplotlib.pyplot as plt
In [25]:
plt.figure(figsize=(10, 6))
plt.bar(result.index, result['sum'])
plt.gcf().axes[0].yaxis.get_major_formatter().set_scientific(False)

plt.xticks(range(0, 24))
plt.yticks(range(0, int(round(result['sum'].max()))+1500000, 500000))

plt.xlabel('Часы')
plt.ylabel('Выручка в $')

for index, value in enumerate(result['sum']):
    plt.text(
        index,
        value+100000,
        '{0:,}'.format(round(value)).replace(',', ' '),
        rotation=90,
        size='10',
        color='#000',
        ha='center')

plt.grid()
plt.show()
In [27]:
plt.figure(figsize=(10, 6))
plt.bar(result.index, result['count'])
plt.gcf().axes[0].yaxis.get_major_formatter().set_scientific(False)

plt.xticks(range(0, 24))
plt.yticks(range(0, int(round(result['count'].max()))+5000, 1000))

plt.xlabel('Часы')
plt.ylabel('Кол-во покупок')

for index, value in enumerate(result['count']):
    plt.text(
        index,
        value+500,
        '{0:,}'.format(round(value)).replace(',', ' '),
        rotation=90,
        size='10',
        color='#000',
        ha='center')

plt.grid()
plt.savefig('Count.png', dpi=100)
plt.show()
In [26]:
df.to_csv('result.csv', index=False)
In [ ]: